library(conos)
library(magrittr)
library(dplyr)
library(cacoa) # github.com/kharchenkolab/cacoa
## Warning: replacing previous import 'ape::where' by 'dplyr::where' when loading
## 'cacoa'
library(sccore)
library(scHelper) # github.com/rrydbirk/scHelper
library(qs)
library(ggplot2)
library(ggsci)
library(cowplot)
library(ggpubr)
library(STRINGdb)
library(reshape2)
library(ggforce)
library(ggpmisc)
library(RColorBrewer)
library(CRMetrics)
library(ggrepel)
library(circlize)
library(ComplexHeatmap)
library(stringr)
library(rstatix)
library(slingshot)
## Comparisons
comp <- list(c("CTRL","MSA"),
c("CTRL","PD"),
c("MSA","PD"))
comp.long <- list(c("CTRL vs. MSA","CTRL vs. PD"),
c("CTRL vs. MSA","PD vs. MSA"),
c("CTRL vs. PD","PD vs. MSA"))
# Palettes
## Major celltypes
anno.neurons <- qread("anno_neurons.qs")
anno.neurons.major <- anno.neurons %>%
collapseAnnotation("GABA") %>%
collapseAnnotation("GLU") %>%
renameAnnotation("GABA", "Inh. neurons") %>%
renameAnnotation("GLU", "Exc. neurons") %>%
collapseAnnotation("MSN")
## Collapsing 11 labels containing 'GABA' in their name into one label.
## Collapsing 2 labels containing 'GLU' in their name into one label.
## Collapsing 3 labels containing 'MSN' in their name into one label.
anno <- qread("anno_major.qs")
anno.major <- anno[!names(anno) %in% names(anno.neurons.major) & !anno == "Neurons"] %>%
{factor(c(., anno.neurons.major))}
# qsave(anno.major, "/work/msa/03_annotations/anno_0final.qs")
pal.major <- brewer.pal(n = 10, "Set1") %>%
c("lightblue3") %>%
setNames(levels(anno.major)[c(9,8,3,5,10,6:7,2,1,4)])
## Warning in brewer.pal(n = 10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Condition
pal.major %<>% c(c("yellow", brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA")))
## Comparisons
pal.major %<>% c(brewer.pal(n = 10, "Paired")[c(6,8,10)] %>% setNames(c("CTRL vs. MSA","CTRL vs. PD","PD vs. MSA")))
## Neurons
pal.major %<>% c(setNames(c("navy",
"mediumblue",
"lightslateblue",
"lightskyblue",
"lightblue",
"lightseagreen",
"blue",
"cadetblue1",
"cyan",
"cyan4",
"aquamarine2",
"lightcoral",
"brown1",
"orange",
"darkorange4",
"lightgoldenrod3"),
levels(anno.neurons)))
## Glia
anno.glia <- c("anno_astro.qs",
"anno_oligo.qs",
"anno_opc.qs") %>%
lapply(qread) %>%
Reduce(c, .) %>%
factor() %>%
renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>%
renameAnnotation("Reactive_astrocytes", "AS_reactive") %>%
renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>%
renameAnnotation("Reactive_SCGZ", "OL_SGCZ")
pal.major %<>% c(setNames(c("pink",
pal.major["Astrocytes"],
pal.major["Oligodendrocytes"],
"chartreuse",
"darkolivegreen",
"brown4",
"coral"),
levels(anno.glia)))
## Microglia
anno.micro <- qread("anno_micro_pvm.qs") %>%
renameAnnotation("Steady-state","MIC_steady-state") %>%
renameAnnotation("Intermediate1","MIC_intermediate1") %>%
renameAnnotation("Intermediate2","MIC_intermediate2") %>%
renameAnnotation("Activated","MIC_activated")
pal.major %<>% c(setNames(c(pal.major["Microglia"],
"maroon4",
"magenta",
"pink3",
pal.major["PVMs"]),
levels(anno.micro)))
## Phago. assay
pal.major %<>% c(setNames(c("purple",
"black",
pal.major[c("CTRL","PD","MSA")]),
c("PBS",
"LPS",
"CTRL CSF",
"PD CSF",
"MSA CSF")))
## Houston assay
pal.major %<>% c(setNames(c(pal.major["PBS"],
pal.major["CTRL"],
"yellow3",
"orange",
pal.major["PD"],
"cyan4",
"navyblue",
pal.major["MSA"],
"pink3",
"red4"),
c("Microglia medium",
"CTRL CSF, no dil.",
"CTRL CSF, 1:2 dil.",
"CTRL CSF, 1:4 dil.",
"PD CSF, no dil.",
"PD CSF, 1:2 dil.",
"PD CSF, 1:4 dil.",
"MSA CSF, no dil.",
"MSA CSF, 1:2 dil.",
"MSA CSF, 1:4 dil.")))
# Load major Conos object
con <- qread("con_major.qs", nthreads = 10)
tt <- Sys.time()
Define helper functions
getOntWithFamily <- function(cao, comp) {
fams <- cao$test.results[["GSEA"]]$families
df.all <- list(cao$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "up", p.adj = 1, q.value = 1),
cao$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "down", p.adj = 1, q.value = 1)) %>%
bind_rows() %>%
mutate(logp = -log10(p.adjust),
comp = comp)
df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = "GSEA", subtype = "BP")
return(list(df.all = df.all,
df = df,
fams = fams))
}
lianaCircos <- function(df,
top.interactions = 30,
text.size = 1,
pal = pal.major,
cell.types = c("Astrocytes",
"Immune",
"Microglia",
"Neurons",
"OPCs",
"Oligodendrocytes",
"PVMs",
"Pericytes/endothelial"),
big.gap = 5,
small.gap = 2,
arrow.width = 3,
link.ramp.rel = T,
link.sort = F,
scale = F,
arrow.head.width = 0.3,
arrow.head.length = 0.3,
link.ramp.col = c("navy", "grey", "firebrick")) {
input_df <- df %>%
slice_max(order_by = score, n = top.interactions) %>%
mutate(target = paste0(target, " ")) %>%
mutate(source_lig = paste0(source, "|", ligand),
target_rec = paste0(target, "|", receptor))
if (link.ramp.rel) {
arr_wd <- rep(arrow.width, nrow(input_df))
} else {
arr_wd <- (((input_df$score - min(input_df$score))/(max(input_df$score) - min(input_df$score))) * (arrow.width)) + 1
}
# Colors and segments
anno.col <- setNames(pal,
cell.types) %>%
c(., "Oligodendrocytes " = unname(.["Oligodendrocytes"]))
cell_cols <- anno.col[unique(c(unique(input_df$source), unique(input_df$target), "Oligodendrocytes "))]
link_cols <- c()
if (!link.ramp.rel) {
for (i in input_df$source_lig) {
link_cols <- c(link_cols, cell_cols[str_extract(i,
"[^|]+")])
}
} else {
input_df %<>%
arrange(score)
df.down <- input_df %>% filter(score <= 0)
link_down <- colorRampPalette(c(link.ramp.col[1], link.ramp.col[2]))(nrow(df.down))
df.up <- input_df %>% filter(score > 0)
link_up <- colorRampPalette(c(link.ramp.col[2], link.ramp.col[3]))(nrow(df.up))
link_cols <- c(link_down, link_up)
}
segments <- unique(c(paste0(input_df$source, "|", input_df$ligand),
paste0(input_df$target, "|", input_df$receptor)))
grp <- str_extract(segments, "[^|]+") %>%
setNames(segments)
# Redo colors
cell_cols2 <- grp
for (i in unique(grp)) {
cell_cols2[cell_cols2 == i] <- cell_cols[i]
}
# Plot
input_df %>%
select(source_lig, target_rec, score) %>%
chordDiagram(directional = 1,
group = grp,
scale = scale,
diffHeight = 0.005,
direction.type = c("arrows"),
link.arr.type = "triangle",
annotationTrack = c(),
preAllocateTracks = list(
list(track.height = 0.05),
list(track.height = 0.25),
list(track.height = 0.05)),
big.gap = big.gap,
transparency = 1,
link.arr.lwd = arr_wd,
link.arr.col = link_cols,
link.arr.length = arrow.head.length,
link.arr.width = arrow.head.width,
small.gap = small.gap
)
circos.track(track.index = 2, panel.fun = function(x, y) {
circos.text(CELL_META$xcenter,
CELL_META$ylim[1],
str_extract(CELL_META$sector.index, "[^|]+$"),
facing = "clockwise",
niceFacing = TRUE,
adj = c(0, 0.55),
cex = 1)
}, bg.border = NA)
# Split segments
for (l in segments) {
highlight.sector(l, track.index = 3, col = cell_cols2[l])
}
# Add ligand/receptor track
## Ligand
highlight.sector(input_df$source_lig,
track.index = 1,
col = "black",
text = "Ligands",
cex = 1,
text.col = "white",
niceFacing = TRUE)
## Receptor
highlight.sector(input_df$target_rec,
track.index = 1,
col = "white",
text = "Receptors",
cex = 1,
text.col = "black",
border = "black",
niceFacing = TRUE)
# Legends
minmax <- input_df %>%
pull(score) %>%
{pmax(abs(min(.)), max(.))} %>%
formatC(digits = 1) %>%
as.numeric()
col.range = c(-minmax, 0, minmax)
lgd_links = Legend(at = col.range,
col_fun = colorRamp2(col.range, link.ramp.col),
title_position = "topleft",
title = "Links")
lgd_ct <- Legend(labels = unique(c(input_df$source, input_df$target)),
title = "Cell type",
type = "points",
legend_gp = gpar(col = "transparent"),
background = cell_cols[unique(c(input_df$source, input_df$target))])
lgd_list_vertical = packLegend(lgd_ct, lgd_links)
draw(lgd_list_vertical,
just = c("left", "bottom"),
x = unit(5, "mm"),
y = unit(5, "mm"))
circos.clear()
}
getTscanTrajectory <- function(con, anno) {
requireNamespace("TSCAN", quietly = T)
emb <- con$embedding[names(anno), ]
anno %<>% .[rownames(emb)]
cent.ids <- emb %>%
rownames() %>%
split(anno)
centroids <- cent.ids %>%
lapply(\(cid) emb[cid, ]) %>%
lapply(colMeans) %>%
bind_rows() %>%
t() %>%
`colnames<-`(c("UMAP1","UMAP2"))
mst <- centroids %>%
TSCAN::createClusterMST(clusters = NULL)
line.data <- TSCAN::reportEdges(centroids, mst = mst, clusters = NULL)
return(line.data)
}
plotGenePseudoBulk <- function(gene, cm.pseudo, legend = T) {
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(condition = strsplit(id, "_|!!") %>% sget(1),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(condition, ct))
x <- cm.pseudo %>%
.[match(gene, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[idx$ord]
plot.dat <- x %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(anno = strsplit(sample, "!!") %>%
sget(2),
condition = strsplit(sample, "!!|_") %>%
sget(1)) %>%
mutate(anno = factor(anno))
stat.test <- plot.dat %>%
group_by(anno) %>%
rstatix::wilcox_test(value ~ condition) %>%
rstatix::add_xy_position(x = "anno", step.increase = 0.1)
p <- plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = condition)) +
theme_bw() +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.ticks.x = element_line()) +
labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = paste0(gene, " expression")) +
scale_fill_manual(values = ggsci::pal_jama()(3)) +
stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.signif") +
stat_pvalue_manual(stat.test)
if (!legend) p <- p + guides(fill = "none")
return(p)
}
con$plotGraph(groups = anno.major,
embedding = "UMAP",
size = 0.1,
palette = pal.major,
font.size = 3,
raster = T,
show.labels = T,
plot.na = F,
show.legend = T,
legend.title = "Cell type",
alpha = 0.05) +
dotSize(3) +
labs(x="UMAP1", y= "UMAP2") +
theme(line = element_blank())
c("RBFOX3","MOG","VCAN","AQP4","CSF1R","PDGFRB","PTPRC","MRC1","GAD1","SLC17A7","PPP1R1B") %>%
sort() %>%
lapply(function(g) con$plotGraph(embedding = "UMAP", gene=g, title=g, size=0.1, plot.na = F, palette = colorRampPalette(c("grey90","firebrick"))) + theme(line = element_blank())) %>%
cowplot::plot_grid(plotlist=., ncol=2)
cm.merged <- con$getJointCountMatrix()
markers <- c("AQP4","PTPRC","CSF1R","RBFOX3","SLC17A7","GAD1","PPP1R1B","MOG","VCAN","PDGFRB","MRC1")
dotPlot(markers,
cm.merged,
anno.major %>%
factor(., levels = sort(levels(.))[c(1,3,5,2,4,6,7:10)]),
cols = c("white","firebrick"),
gene.order = markers)
cao_msa <- qread("cao_major_msa.qs", nthreads = 10)
cao_pd <- qread("cao_major_pd.qs", nthreads = 10)
cao_dis <- qread("cao_major_dis.qs", nthreads = 10)
spc <- con$getDatasetPerCell()
cpc <- spc %>%
as.character() %>%
grepl.replace(c("CTRL","MSA","PD")) %>%
`names<-`(names(spc))
con$plotGraph(groups = cpc,
embedding = "UMAP",
size = 0.1,
alpha = 0.05,
palette = pal.major,
font.size = 3,
raster = T,
show.labels = T,
plot.na = F,
mark.groups = F,
show.legend = T) +
labs(x="UMAP1", y= "UMAP2", col = "") +
theme(legend.position = "bottom",
line = element_blank()) +
dotSize(3)
cao_msa$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nMSA")) +
labs(x = "", y = "")
cao_pd$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nPD")) +
geom_vline(xintercept = 7.5, col = "red") +
labs(x = "", y = "")
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_dis$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nPD",0,"1\nMSA")) +
geom_vline(xintercept = 5.5, col = "red") +
labs(x = "", y = "")
cao_msa$cell.groups.palette <- pal.major
cao_pd$cell.groups.palette <- pal.major
cao_dis$cell.groups.palette <- pal.major
cao_msa$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
cao_pd$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
cao_dis$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
cao <- qread("cao_neurons.qs", nthreads = 10)
cao_msa <- qread("cao_neurons_msa.qs", nthreads = 10)
cao_pd <- qread("cao_neurons_pd.qs", nthreads = 10)
cao_dis <- qread("cao_neurons_dis.qs", nthreads = 10)
cao$plotEmbedding(groups = cao$cell.groups,
size = 0.1,
palette = pal.major,
font.size = 3,
raster = T,
show.labels = T,
plot.na = F,
alpha = 0.1) +
theme(line = element_blank()) +
labs(x="largeVis1", y= "largeVis2")
cm.merged <- cao$data.object$getJointCountMatrix()
markers <- c("GAD1","GAD2","PPP1R1B","SLC17A7","CHAT","CALB2","DCC","ID2","MEIS2","ST18","NKX2-1","NR2F2","PAX6","PVALB","RXFP1","SST","VIP","RELN","SATB2","DRD1","DRD2","FOXP2", "LRP8", "VLDLR")
dotPlot(markers,
cm.merged,
anno.neurons,
cols = c("white","firebrick"),
gene.order = markers)
cao_msa$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nMSA")) +
labs(x = "", y = "")
cao_pd$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nPD")) +
geom_vline(xintercept = 13.5, col = "red") +
labs(x = "", y = "")
cao_dis$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nPD",0,"1\nMSA")) +
labs(x = "", y = "")
sample.groups <- cao$sample.groups
cao$plotCellDensity(show.cell.groups = F)$CTRL +
geom_circle(aes(x0 = 0.75, y0 = -4.5, r = 1)) +
geom_circle(aes(x0 = 3.8, y0 = -1.1, r = 0.5), col = "cyan3") +
theme(line = element_blank())
# MSA
cao$sample.groups <- sample.groups %>%
names() %>%
grepl.replace(c("CTRL","MSA","PD")) %>%
`names<-`(sample.groups %>% names()) %>%
.[. %in% c("CTRL","MSA")]
cao$target.level <- "MSA"
cao$estimateCellDensity(method = "graph", name = "msa.density")
cao$estimateDiffCellDensity(type = "subtract", name = "msa.density")
cao$plotCellDensity(show.cell.groups = F, name = "msa.density")$MSA +
geom_circle(aes(x0 = 0.75, y0 = -4.5, r = 1)) +
geom_circle(aes(x0 = 3.8, y0 = -1.1, r = 0.5), col = "cyan3") +
theme(line = element_blank())
# PD
cao$sample.groups <- sample.groups %>%
names() %>%
grepl.replace(c("CTRL","MSA","PD")) %>%
`names<-`(sample.groups %>% names()) %>%
.[. %in% c("CTRL","PD")]
cao$target.level <- "PD"
cao$estimateCellDensity(method = "graph", name = "pd.density")
cao$estimateDiffCellDensity(type = "subtract", name = "pd.density")
cao$plotCellDensity(show.cell.groups = F, name = "pd.density")$PD +
geom_circle(aes(x0 = 0.75, y0 = -4.5, r = 1)) +
geom_circle(aes(x0 = 3.8, y0 = -1.1, r = 0.5), col = "cyan3") +
theme(line = element_blank())
cao_msa$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
cao_pd$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
cao_dis$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
con.glia <- qread("/work/msa/02_data/conos/con_oligo_astro_opc.qs", nthreads = 10)
cao_msa <- qread("cao_oligo_astro_opc_msa.qs", nthreads = 10)
cao_pd <- qread("cao_oligo_astro_opc_pd.qs", nthreads = 10)
cao_dis <- qread("cao_oligo_astro_opc_dis.qs", nthreads = 10)
con.glia$plotGraph(groups = anno.glia,
plot.na = F,
size = 0.1,
palette = pal.major,
font.size = 3,
raster = T,
show.labels = T, embedding = "largeVis_CPCA_AS01",
alpha = 0.05) +
labs(x = "largeVis1", y = "largeVis2") +
theme(line = element_blank())
cm.merged <- con.glia$getJointCountMatrix()
markers <- c("AQP4","TNC","MOG","LINC01608","SLC5A11","SGCZ","VCAN","OLIG2")
dotPlot(markers,
cm.merged,
anno.glia,
cols = c("white","firebrick"),
gene.order = markers)
cao_msa$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nMSA")) +
geom_vline(xintercept = 6.5, col = "red") +
labs(x = "", y = "")
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_pd$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nPD")) +
geom_vline(xintercept = 4.5, col = "red") +
labs(x = "", y = "")
## Warning: Removed 59 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_dis$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nPD",0,"1\nMSA")) +
geom_vline(xintercept = 6.5, col = "red") +
labs(x = "", y = "")
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_msa$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
cao_pd$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
cao_dis$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
cao_msa <- qread("cao_astro_msa.qs", nthreads = 10)
cao_pd <- qread("cao_astro_pd.qs", nthreads = 10)
cao_dis <- qread("cao_astro_dis.qs", nthreads = 10)
df.all <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df.all") %>%
bind_rows() %>%
mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower()))
## Loading required package: DOSE
##
## DOSE v3.30.5 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
# Select relevant pathways
p.ids <- c("GO:0050821", "GO:0050728", "GO:0006986", "GO:0030168", "GO:0030001", "GO:1904707", "GO:0140053", "GO:0006486", "GO:0010717", "GO:0031113", "GO:0032675", "GO:0061041", "GO:0071222", "GO:0045766", "GO:0006487", "GO:0097501", "GO:0042776", "GO:0045047", "GO:0044794", "GO:0035633", "GO:0097242", "GO:0006120", "GO:0042026", "GO:0006979", "GO:0071222", "GO:0017015", "GO:0009611", "GO:1901201", "GO:0042594", "GO:0097501", "GO:0006123", "GO:0006909", "GO:0007179", "GO:0006120", "GO:0097501")
ct <- "AS_homeostatic"
df.sel <- df.all %>%
filter(Group == ct)
p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
dotSize(3)
df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df") %>%
bind_rows() %>%
mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower())) %>%
filter(Group == ct,
ID %in% p.ids)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
mutate(Group = factor(Group, levels = sort(unique(Group)))) %>%
select(Description, NES, Group, comp) %>%
as.data.frame() %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
ct <- "AS_reactive"
df.sel <- df.all %>%
filter(Group == ct)
p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
dotSize(3)
df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df") %>%
bind_rows() %>%
mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower())) %>%
filter(Group == ct,
ID %in% p.ids)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
mutate(Group = factor(Group, levels = sort(unique(Group)))) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
cao_msa <- qread("cao_oligo_msa.qs", nthreads = 10)
cao_pd <- qread("cao_oligo_pd.qs", nthreads = 10)
cao_dis <- qread("cao_oligo_dis.qs", nthreads = 10)
df.all <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df.all") %>%
bind_rows() %>%
mutate(Group = paste0("OL_", strsplit(Group, "_") %>% sget(2) %>% gsub("SCGZ", "SGCZ", .)))
# Select relevant pathways
p.ids <- c("GO:0071345", "GO:0045596", "GO:0060271", "GO:0048709", "GO:0006986", "GO:0016032", "GO:0060271", "GO:0042026", "GO:0006986", "GO:0120034", "GO:0071346", "GO:1904262", "GO:0042776", "GO:0061077", "GO:0050821", "GO:0045047", "GO:0042776", "GO:0050821", "GO:0045047", "GO:0007042", "GO:0042776", "GO:0061077", "GO:0045047", "GO:0042776", "GO:0045047", "GO:2000249", "GO:0002474", "GO:0042026", "GO:0002040", "GO:0046330", "GO:0043542", "GO:0030036", "GO:0032981", "GO:0007042")
ct <- "OL_SLC5A11"
df.sel <- df.all %>%
filter(Group == ct)
p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
dotSize(3)
df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df") %>%
bind_rows() %>%
mutate(Group = paste0("OL_", strsplit(Group, "_") %>% sget(2) %>% gsub("SCGZ", "SGCZ", .))) %>%
filter(Group == ct,
ID %in% p.ids)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
mutate(Group = factor(Group, levels = sort(unique(Group)))) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
cm.merged <- con$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno.glia)]
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.glia %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
{. + 1} %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# For DE between visits
genes <- "TLR1"
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(condition = strsplit(id, "_|!!") %>% sget(1),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(condition, ct))
x <- cm.pseudo %>%
.[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[idx$ord]
plot.dat <- x %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(anno = strsplit(sample, "!!") %>%
sget(2),
condition = strsplit(sample, "!!|_") %>%
sget(1)) %>%
filter(anno %in% c("OL_LINC01608", "OL_SGCZ", "OL_SLC5A11")) %>%
mutate(anno = factor(anno))
stat.test <- plot.dat %>%
group_by(anno) %>%
rstatix::wilcox_test(value ~ condition) %>%
rstatix::add_xy_position(x = "anno", step.increase = 0.1, fun = "mean_sd", scales = "free")
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = condition)) +
theme_bw() +
theme(line = element_blank(),
axis.ticks.x = element_line(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "TLR1 expression") +
scale_fill_manual(values = ggsci::pal_jama()(3)) +
stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.signif", label.y = 8.5e2) +
stat_pvalue_manual(stat.test)
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", :
## Ignoring unknown aesthetics: fill
# For DE between visits
genes <- "LRP1"
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(condition = strsplit(id, "_|!!") %>% sget(1),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(condition, ct))
x <- cm.pseudo %>%
.[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[idx$ord]
plot.dat <- x %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(anno = strsplit(sample, "!!") %>%
sget(2),
condition = strsplit(sample, "!!|_") %>%
sget(1)) %>%
filter(anno %in% c("OL_LINC01608", "OL_SGCZ", "OL_SLC5A11")) %>%
mutate(anno = factor(anno))
stat.test <- plot.dat %>%
group_by(anno) %>%
rstatix::wilcox_test(value ~ condition) %>%
rstatix::add_xy_position(x = "anno", step.increase = 0.1)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = condition)) +
theme_bw() +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.ticks.x = element_line()) +
labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "LRP1 expression") +
scale_fill_manual(values = ggsci::pal_jama()(3)) +
stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.signif", label.y = 175) +
stat_pvalue_manual(stat.test) +
ylim(c(0, 180))
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", :
## Ignoring unknown aesthetics: fill
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bracket()`).
For calculation of data, see Liana.ipynb.
dat <- read.delim("liana_res.csv",
sep = ",",
header = T) %>%
mutate(group = strsplit(sample, "_") %>%
sget(1))
dat.plot <- dat %>%
dplyr::rename(ligand = ligand_complex,
receptor = receptor_complex,
score = lrscore) %>%
filter(target == "Oligodendrocytes",
ligand %in% c("BMP1", "SERPINE2", "PSAP", "APOE", "APP", "SERPING1"),
receptor %in% c("LRP1", "BMPR1A", "LRP4")) %>%
group_by(group, ligand, receptor, source, target) %>%
summarize(score = mean(score)) %>%
ungroup() %>%
arrange(group, source, target, ligand, receptor)
## `summarise()` has grouped output by 'group', 'ligand', 'receptor', 'source'.
## You can override using the `.groups` argument.
dat.msa <- dat.plot %>%
filter(group == "MSA",
score > 0.39) %>%
mutate(lrst = paste0(ligand, receptor, source, target))
dat.pd <- dat.plot %>%
mutate(lrst = paste0(ligand, receptor, source, target)) %>%
filter(group == "PD",
lrst %in% dat.msa$lrst)
dat.rel <- dat.msa %>%
mutate(score = score - dat.pd$score)
dat.rel %>%
lianaCircos()
cao <- qread("cao_micro_pvm.qs", nthreads = 10)
cao_msa <- qread("cao_micro_pvm_msa.qs", nthreads = 10)
cao_pd <- qread("cao_micro_pvm_pd.qs", nthreads = 10)
cao_dis <- qread("cao_micro_pvm_dis.qs", nthreads = 10)
sample.groups <- cao$sample.groups
cao$plotEmbedding(groups = anno.micro,
size = 0.5,
palette = pal.major,
font.size = 3,
raster = T,
mark.groups = T,
plot.na = F,
alpha = 0.2) +
labs(x="UMAP1", y= "UMAP2") +
theme(line = element_blank())
cm.merged <- cao$data.object$getJointCountMatrix()
markers <- c("AIF1","F13A1","MRC1","CD163","CD74")
dotPlot(markers,
cm.merged,
cao$cell.groups,
cols = c("white","firebrick"))
anno.sort <- anno.micro[rownames(cao$data.object$embedding)]
anno.sel <- anno.sort[!anno.sort %in% c("MIC_intermediate1", "PVMs")] %>% factor()
anno.sel2 <- anno.sort[!anno.sort %in% c("MIC_intermediate2", "MIC_activated")] %>% factor()
ldata1 <- getTscanTrajectory(cao$data.object, anno.sel)
ldata2 <- getTscanTrajectory(cao$data.object, anno.sel2)
ldata <- rbind(ldata1, ldata2)
cao$data.object$embedding %>%
as.data.frame() %>%
mutate(., unname(anno.micro[rownames(.)])) %>%
setNames(c("UMAP1", "UMAP2", "annotation")) %>%
ggplot() +
geom_point(aes(UMAP1, UMAP2, col = annotation), size = 0.3) +
geom_line(data = ldata, mapping=aes(UMAP1, UMAP2, group = edge), linewidth = 1) +
theme_bw() +
theme(legend.position = "right",
line = element_blank()) +
labs(col = "", x = "UMAP1", y = "UMAP2") +
scale_colour_manual(values = pal.major) +
dotSize(3)
Prepare data
emb <- cao$data.object$embedding %>%
`colnames<-`(c("UMAP1","UMAP2")) %>%
.[rownames(.) %in% names(anno.sel2), ]
sds_obj <- slingshot(emb,
anno.sel2,
start.clus = "MIC_steady-state",
stretch = 0
)
sds <- as.SlingshotDataSet(sds_obj)
pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1]
Here, we show the code for running the generalized additive mixed
model. It takes a significant amount of time to run, we provide data in
pseudotime_micro_l2.qs.
mat <- cao$data.object$getJointCountMatrix() %>%
.[match(names(sds_obj), rownames(.)), colSums(.) > 2e2] %>%
.[, !grepl(pattern = "RPL|RPS|MT-", colnames(.))]
mt <- mat %>%
as.matrix() %>%
as.data.frame() %>%
tibble::rownames_to_column() %>%
{message("Mutating"); mutate(.,
pseudotime = pseudotime[rowname],
condition = cao$data.object$getDatasetPerCell()[rowname] %>%
as.character() %>%
strsplit("_") %>%
sget(1),
sample = strsplit(rowname, "!!") %>%
sget(1))} %>%
.[complete.cases(.), ] %>%
{message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "condition", "sample"))} %$%
{message("Splitting"); split(., variable)}
library(gamm4)
res <- mt %>%
sccore::plapply(\(x) {
fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(condition)), random = ~(1 | sample), REML = F)
fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | sample), REML = F)
fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | sample), REML = F)
ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
residuals <- predict(fit_full$gam, se.fit = T)$fit %>%
unname()
r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>%
setNames(c("full", "reduced"))
out <- list(anova = ann,
residuals = residuals,
r.sq = r.sq)
return(out)
}
}, n.cores = 40, mc.preschedule = T, mc.cleanup = T, progress = F) %>%
.[!sapply(., is.null)]
qsave(res, "pseudotime_micro_l2.qs")
cao$data.object$embedding %>%
as.data.frame() %>%
mutate(., unname(anno.micro[rownames(.)])) %>%
setNames(c("UMAP1", "UMAP2", "annotation")) %>%
mutate(., pseudotime = pseudotime[rownames(.)]) %>%
ggplot() +
geom_point(aes(UMAP1, UMAP2, col = pseudotime), size = 0.3) +
geom_line(data = ldata2, aes(UMAP1, UMAP2, group = edge), size = 1) +
theme_bw() +
theme(line = element_blank()) +
scale_color_gradient(low = "navyblue", high = "orange") +
labs(col = "Pseudotime", x = "UMAP1", y = "UMAP2")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Please note, row order may change per iteration.
res <- qread("pseudotime_micro_l2.qs")
rsq.pseudo <- res %>%
sapply(\(gene) gene$r.sq[1]) %>%
sort(decreasing = T)
res.filter <- res[names(rsq.pseudo)[seq(50)] %>% strsplit(".", fixed = T) %>% sget(1)]
cm.merged <- cao$data.object$getJointCountMatrix() %>%
.[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>%
.[rowSums(.) > 0, ]
## Predict smoothend expression
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7
scFit <- cm.merged %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)
Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)
# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))
# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]
# Create heatmap
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))
Heatmap(Smooth,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
row_names_gp = grid::gpar(fontsize = 5))
We provide this figure here since all data are already loaded.
cpc <- pseudotime %>%
names() %>%
strsplit("_") %>%
sget(1) %>%
factor()
res[rownames(Smooth)] %>%
lget("residuals") %>%
lapply(as.data.frame) %>%
lapply(setNames, "residuals") %>%
lapply(mutate, group = cpc, pseudotime = pseudotime) %>%
data.table::rbindlist(idcol = "gene") %>%
ggplot(aes(pseudotime, residuals, col = group)) +
geom_smooth() +
theme_bw() +
facet_wrap(~gene, ncol = 5, scales = "free")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
cao_msa$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none") +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nMSA")) +
geom_vline(xintercept = 4.5, col = "red") +
labs(x = "", y = "") +
theme(line = element_blank())
cao_pd$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none") +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nPD")) +
geom_vline(xintercept = 4.5, col = "red") +
labs(x = "", y = "") +
theme(line = element_blank())
## Warning: Removed 273 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_dis$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none") +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nPD",0,"1\nMSA")) +
geom_vline(xintercept = 3.5, col = "red") +
labs(x = "", y = "") +
theme(line = element_blank())
## Warning: Removed 311 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
We provide combined data from the RNAscope experiments as a single
file res.qs.
res <- qread("RNAscope.qs") %>%
filter(target == "AIF1", Ch1NumSpots > 0) %>%
arrange(file)
area <- read.table("RNAscope_areas.tsv", sep = "\t", header = T) %>% # Million pixels
mutate(file = paste(File.name, Tissue, sep = "")) %>%
filter(file %in% res$file) %>%
arrange(file) %>%
mutate(rel_px = px.2 / 1E6)
p1 <- res %>%
group_by(group, target, file) %>%
summarize(spots = mean(Ch1NumSpots)) %>%
ggplot(aes(group, spots, fill = group)) +
geom_boxplot() +
geom_jitter(width = 0.2) +
theme_bw() +
labs(y = "Mean no. spots per double-positive cell", x = "") +
stat_compare_means(method = "kruskal.test", label.y = 115) +
stat_compare_means(comparisons = comp, method = "wilcox.test") +
theme(line = element_blank()) +
guides(fill = "none") +
scale_fill_manual(values = pal.major)
## `summarise()` has grouped output by 'group', 'target'. You can override using
## the `.groups` argument.
p2 <- res %>%
group_by(group, target, file) %>%
summarize(no_cells = n()) %>%
as.data.frame() %>%
arrange(file) %>%
mutate(rel_cells = no_cells / area$rel_px) %>%
ggplot(aes(group, rel_cells, fill = group)) +
geom_boxplot(outliers = F) +
geom_jitter(width = 0.2) +
theme_bw() +
stat_compare_means(method = "kruskal.test", label.y = 17) +
stat_compare_means(comparisons = comp, method = "wilcox.test") +
labs(y = "No. double-positive cells\nNormalized to area", x = "") +
theme(line = element_blank()) +
guides(fill = "none") +
scale_fill_manual(values = pal.major)
## `summarise()` has grouped output by 'group', 'target'. You can override using
## the `.groups` argument.
plot_grid(plotlist = list(p1, p2), ncol = 1)
fams <- cao_msa$test.results[["GSEA"]]$families
df.all <- list(cao_msa$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "up", p.adj = 1, q.value = 1),
cao_msa$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "down", p.adj = 1, q.value = 1)) %>%
bind_rows() %>%
mutate(logp = -log10(p.adjust))
# Relevant pathways
p.ids <- c("GO:0060760", "GO:0002230", "GO:0019915", "GO:1904646", "GO:0042775", "GO:0019646", "GO:0033108", "GO:0035455", "GO:0061077", "GO:0006914", "GO:0042026", "GO:0034340", "GO:0051607", "GO:0060337", "GO:0035455", "GO:0002684", "GO:0036037", "GO:0048514")
df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = "GSEA", subtype = "BP") %>%
mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .)) %>%
filter(ID %in% p.ids)
df.all %<>% mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .))
p1 <- ggplot(df.all, aes(NES, logp, col = Group, shape = Group)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.5) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)") +
dotSize(3)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
mutate(Group = factor(Group, levels = sort(unique(Group)))) %>%
select(Description, NES, Group) %$%
split(., Group) %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
dplyr::slice(seq(25)) %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = Group)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group) %>%
dplyr::slice(seq(20)) %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = Group)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
fams <- cao_pd$test.results[["GSEA"]]$families
df.all <- list(cao_pd$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "up", p.adj = 1, q.value = 1),
cao_pd$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "down", p.adj = 1, q.value = 1)) %>%
bind_rows() %>%
mutate(logp = -log10(p.adjust))
# Select relevant pathways
p.ids <- c("GO:0042776", "GO:0034341", "GO:0001916", "GO:0002503", "GO:0019885", "GO:0016042", "GO:0001909", "GO:0051085", "GO:0045824", "GO:0044000", "GO:0019885", "GO:0044242", "GO:0001944", "GO:0034976", "GO:0019646", "GO:0009205", "GO:0019883", "GO:0070972", "GO:0002474")
df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = "GSEA", subtype = "BP") %>%
mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .)) %>%
filter(ID %in% p.ids)
df.all %<>% mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .))
p1 <- ggplot(df.all, aes(NES, logp, col = Group, shape = Group)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.5) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)") +
dotSize(3)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
select(Description, NES, Group) %$%
split(., Group) %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
dplyr::slice(seq(25)) %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = Group)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group) %$%
split(., Group) %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
dplyr::slice(seq(25)) %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = Group)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
Figures 6c,e,f where made with GraphPad Prism. Data are available in
Phagocytosis_assay_triplicates_raw_data_HH.tsv (Copenhagen
experiment) or HOUSTON.tsv (Houston experiment).
dat <- read.delim("Phagocytosis_assay_triplicates_raw_data_HH.tsv",
header = T, dec = ",") %>%
tidyr::pivot_longer(names_to = "group", cols = -1, values_to = "value") %>%
mutate(group = gsub(".1|.2", "", group) %>% factor(labels = c("PBS","LPS","MSA CSF","CTRL CSF","PD CSF"))) %>%
mutate(group = factor(group, levels = c("PBS","LPS","CTRL CSF","MSA CSF","PD CSF"))) %>%
group_by(Time, group) %>%
summarize(mean = mean(value, na.rm = T),
sd = sd(value, na.rm = T))
## `summarise()` has grouped output by 'Time'. You can override using the
## `.groups` argument.
dat %>%
ggplot(aes(Time, mean, group=group, color=group)) +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
geom_point(size = 0.5) +
theme_bw() +
labs(x = "Time (hours)", y = "pHrodo-labelled E. coli uptake - intensity ratio [AU]", col = "Stimulation") +
scale_color_manual(values = pal.major) +
theme(line = element_blank())
dat.raw <- read.table("Cytokine_summary.tsv", header = TRUE, sep = "\t", dec = ",")
dat.tmp <- dat.raw %>%
melt(id.vars = c("Sample","Group","Condition")) %>%
mutate(variable = variable %>%
as.character() %>%
gsub(".", "-", ., fixed = T) %>%
as.factor()) %>%
filter(Condition == "Media", !Group %in% c("PBS","LPS")) %>%
filter(Condition == "Media") %>%
mutate(Group = Group %>% factor(levels = c("CTRL","MSA","PD")),
variable = variable %>% factor())
dat.stat <- dat.raw %>%
select(!IL.8) %>%
mutate(Group = factor(Group, levels = c("PBS","LPS","CTRL","MSA","PD"))) %>%
filter(!Group %in% c("PBS","LPS"))
res.il10 <- FSA::dunnTest(IL10 ~ Group, data = dat.stat %>% {`colnames<-`(., gsub(".", "", colnames(.), fixed = T))}, method = "bh")
## Registered S3 methods overwritten by 'FSA':
## method from
## confint.boot car
## hist.boot car
stat.test <- dat.tmp %>%
filter(variable == "IL-10") %>%
select(Group, value) %>%
as.data.frame() %>%
rstatix::wilcox_test(value ~ Group) %>%
mutate(p = res.il10$res$P.unadj,
p.adj = res.il10$res$P.adj %>% formatC(digits = 2),
p.adj.signif = c("*","ns","ns")) %>%
rstatix::add_xy_position(x = "Group")
dat.tmp %>%
filter(variable == "IL-10") %>%
ggplot(aes(Group, value)) +
geom_point(aes(col = Group), size = 3, position = position_jitter(width = 0.3)) +
theme_bw() +
theme(line = element_blank()) +
stat_pvalue_manual(stat.test, label = "p.adj", tip.length = 0.01, backet.nudge.y = 3, hide.ns = F) +
scale_color_manual(values = pal.major) +
labs(x = "", y = "IL-10 pg/mL") +
guides(col = "none")
dat.melt <- read.table("Microglia+CSF_samples_aSyn_sCD163.tsv", header = T, sep = "\t", dec = ",") %>%
melt(id.vars = c("diagnosis", "sex")) %>%
mutate(diagnosis = factor(diagnosis, levels = c("CTRL","MSA","IPD"), labels = c("CTRL","MSA","PD")))
dat.melt %>%
filter(variable == "sCD163_ng.mL_CSF") %>%
ggplot(aes(diagnosis, value)) +
geom_boxplot(aes(fill = diagnosis)) +
theme_bw() +
labs(x = "", y = "sCD163 ng/ml", fill = "Diagnosis") +
scale_fill_manual(values = pal.major) +
stat_compare_means(method = "t.test", comparisons = list(c("CTRL","MSA"), c("MSA","PD"), c("CTRL","PD"))) +
guides(fill = "none") +
theme(line = element_blank())
dat.melt %>%
filter(variable == "aSyn_pg.mL") %>%
ggplot(aes(diagnosis, value)) +
geom_boxplot(aes(fill = diagnosis)) +
theme_bw() +
labs(x = "", y = "aSyn pg/ml", fill = "Diagnosis") +
scale_fill_manual(values = pal.major) +
stat_compare_means(method = "t.test", comparisons = list(c("CTRL","MSA"), c("MSA","PD"), c("CTRL","PD"))) +
guides(fill = "none") +
theme(line = element_blank())
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_signif()`).
dat.melt %>%
filter(variable %in% c("aSyn_pg.mL","sCD163_ng.mL_CSF")) %>%
select(-sex) %>%
mutate(id = rep(seq(63), 2)) %>%
tidyr::pivot_wider(names_from = variable, values_from = value) %>%
ggplot(aes(aSyn_pg.mL, sCD163_ng.mL_CSF)) +
geom_point(aes(col = diagnosis)) +
theme_bw() +
labs(x = "aSyn pg/ml", y = "sCD163, ng/ml", col = "") +
geom_smooth(method = MASS::rlm, se = FALSE, color = "black") +
stat_poly_eq(aes(label = paste(after_stat(rr.label), after_stat(p.value.label), sep = "*\", \"*")), color = "black") +
scale_color_manual(values = pal.major) +
theme(line = element_blank())
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_poly_eq()`).
## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
samples <- anno.major %>%
names() %>%
strsplit("!!") %>%
sget(1) %>%
unique()
crm <- qread("crm.qs",
nthreads = 10)
mtp <- crm$selectMetrics(ids = c(1:4,6,19))
mtp %>%
lapply(\(met) crm$plotSummaryMetrics(metrics = met, comp.group = "sample", second.comp.group = "group") +
scale_fill_manual(values = pal.major) +
labs(x = "") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())) %>%
plot_grid(plotlist = ., labels = letters[1:6], ncol = 2)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
crm$plotSummaryMetrics(metrics = crm$selectMetrics(ids = 20), comp.group = "sample", second.comp.group = "group") +
scale_fill_manual(values = pal.major) +
labs(x = "") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "right")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
crm$plotFilteredCells(doublet.method = "doubletdetection", depth.cutoff = 5e2, size = 0.2, alpha = 0.1) +
dotSize(3)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
crm$plotFilteredCells(type = "bar", doublet.method = "doubletdetection", depth.cutoff = 5e2)$data %>%
filter(sample != "MSA_1406") %>%
mutate(sample = strsplit(sample, "_") %>%
sget(1)) %$%
split(., sample) %>%
lapply(\(x) split(x, x$filter)) %>%
lapply(lapply, \(df) mutate(df, sample = paste0(sample, seq(nrow(df))))) %>%
lapply(bind_rows) %>%
bind_rows() %>%
mutate(sample = factor(sample, levels = c(paste0("CTRL", seq(10)), paste0("MSA", seq(7)), paste0("PD", seq(12))))) %>%
ggplot(aes(sample, pct, fill = filter)) +
geom_bar(stat = "identity") +
geom_text_repel(aes(label = sprintf("%0.2f", round(pct, digits = 2))),
position = position_stack(vjust = 0.5),
direction = "y",
size = 2.5) +
crm$theme +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "", y = "Percentage cells filtered") +
theme(legend.position = "bottom")
These figures cannot be reproduced here due to GDPR. Leaving the code for visibility.
crm <- qread("crm.qs", nthreads = 10)
crm$plotCbCells(samples = crm$metadata$sample %>% as.character() %>% .[. != "MSA_1406"]) +
scale_x_discrete(labels = c(paste("CTRL", seq(10)), paste("MSA", seq(7)), paste("PD", seq(12)))) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
crm$plotCbAmbGenes() +
theme(line = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
axis.ticks.x = element_line())
con$embedding <- con$embeddings$UMAP
sample.groups <- con$samples %>%
names() %>%
setNames(grepl.replace(., c("CTRL","MSA","PD")), .)
cao <- Cacoa$new(data.object = con,
sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "DISEASE") %>%
setNames(names(sample.groups)),
cell.groups = anno.major,
ref.level = "CTRL",
target.level = "DISEASE",
n.cores = 32)
meta <- read.delim("metadata.tsv", sep = "\t") %>%
filter(sample %in% names(con$samples)) %>%
tibble::column_to_rownames("sample")
cao$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao$estimateMetadataSeparation(sample.meta = meta,
space = "expression.shifts")
cao$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP")$data %>%
mutate(condition = unname(sample.groups)) %>%
ggplot(aes(x, y, shape = condition, col = condition)) +
geom_point(aes(size = n.cells)) + scale_size_continuous(trans = "log10",
range = c(5 * 0.5, 5 * 1.5), name = "Num. cells") +
theme_bw() +
scale_color_manual(values = pal.major) +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
labs(x = "", y = "", title = "Condition", shape = "Condition", col = "Condition") +
dotSize(3)
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
cao$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$age %>% setNames(rownames(meta)))$data %>%
mutate(condition = unname(sample.groups)) %>%
ggplot(aes(x, y, shape = condition, col = color)) +
geom_point(size = 3) +
theme_bw() +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
labs(x = "", y = "", title = "Age", color = "Age (y)", shape = "Condition")
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
cao$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$brain_bank %>% setNames(rownames(meta)))$data %>%
mutate(condition = unname(sample.groups)) %>%
ggplot(aes(x, y, shape = condition, col = color)) +
geom_point(size = 3) +
theme_bw() +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
labs(x = "", y = "", title = "Brain bank", shape = "Condition", color = "Brain bank")
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
cao$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)))$data %>%
mutate(condition = unname(sample.groups)) %>%
ggplot(aes(x, y, shape = condition, col = color)) +
geom_point(size = 3) +
theme_bw() +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
labs(x = "", y = "", title = "Extract", shape = "Condition", color = "Extract")
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
cao$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)))$data %>%
mutate(condition = unname(sample.groups)) %>%
ggplot(aes(x, y, shape = condition, col = color)) +
geom_point(size = 3) +
theme_bw() +
theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
labs(x = "", y = "", title = "Flowcell", shape = "Condition", color = "Flowcell")
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
cm.merged <- con$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno.major)]
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.major %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
{. + 1} %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
c("TSPO", "COQ2", "MAPT", "FBXO47", "ELOVL7", "EDN1", "GAB1", "TENM2", "RABGEF1", "PLA2G4C", "INPP4B", "ZIC1", "ZIC2", "ZIC3", "ZIC4", "SNCA", "TPPP", "TPPP") %>%
{Map(\(gene, leg) plotGenePseudoBulk(gene, cm.pseudo, leg), gene = ., leg = c(rep(F, 17), T))} %>%
cowplot::plot_grid(plotlist = ., ncol = 3)
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", : Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
cao <- qread("cao_micro_pvm.qs", nthreads = 10)
anno.sort <- anno.micro[rownames(cao$data.object$embedding)]
anno.sel <- anno.sort[!anno.sort %in% c("MIC_intermediate1", "PVMs")] %>% factor()
emb <- cao$data.object$embedding %>%
`colnames<-`(c("UMAP1","UMAP2")) %>%
.[rownames(.) %in% names(anno.sel), ]
sds_obj <- slingshot(emb,
anno.sel,
start.clus = "MIC_steady-state",
stretch = 0
)
sds <- as.SlingshotDataSet(sds_obj)
pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1]
plot.df <- cao$data.object$embedding %>%
as.data.frame() %>%
.[rownames(.) %in% names(sds_obj), ] %>%
`colnames<-`(c("UMAP1","UMAP2")) %>%
mutate(., annotation = anno.micro[rownames(.)] %>% as.factor()) %>%
.[complete.cases(.),]
ldata <- getTscanTrajectory(cao$data.object, anno.sel)
cao$data.object$embedding %>%
as.data.frame() %>%
mutate(., unname(anno.micro[rownames(.)])) %>%
setNames(c("UMAP1", "UMAP2", "annotation")) %>%
mutate(., pseudotime = pseudotime[rownames(.)]) %>%
ggplot() +
geom_point(aes(UMAP1, UMAP2, col = pseudotime), size = 0.3) +
geom_line(data = ldata1, aes(UMAP1, UMAP2, group = edge), size = 1) +
theme_bw() +
theme(line = element_blank()) +
scale_color_gradient(low = "navyblue", high = "orange") +
labs(col = "Pseudotime", x = "UMAP1", y = "UMAP2")# +
Here, we show the code for running the generalized additive mixed
model. It takes a significant amount of time to run, we provide data in
pseudotime_micro_l1.qs.
mat <- cao$data.object$getJointCountMatrix() %>%
.[match(names(sds_obj), rownames(.)), colSums(.) > 2e2] %>%
.[, !grepl(pattern = "RPL|RPS|MT-", colnames(.))]
mt <- mat %>%
as.matrix() %>%
as.data.frame() %>%
tibble::rownames_to_column() %>%
{message("Mutating"); mutate(.,
pseudotime = pseudotime[rowname],
condition = cao$data.object$getDatasetPerCell()[rowname] %>%
as.character() %>%
strsplit("_") %>%
sget(1),
sample = strsplit(rowname, "!!") %>%
sget(1))} %>%
.[complete.cases(.), ] %>%
{message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "condition", "sample"))} %$%
{message("Splitting"); split(., variable)}
library(gamm4)
res <- mt %>%
sccore::plapply(\(x) {
fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(condition)), random = ~(1 | sample), REML = F)
fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | sample), REML = F)
fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | sample), REML = F)
ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
residuals <- predict(fit_full$gam, se.fit = T)$fit %>%
unname()
r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>%
setNames(c("full", "reduced"))
out <- list(anova = ann,
residuals = residuals,
r.sq = r.sq)
return(out)
}
}, n.cores = 40, mc.preschedule = T, mc.cleanup = T, progress = F) %>%
.[!sapply(., is.null)]
qsave(res, "pseudotime_micro_l1.qs")
res <- qread("pseudotime_micro_l1.qs")
rsq.pseudo <- res %>%
sapply(\(gene) gene$r.sq[1]) %>%
sort(decreasing = T)
res.filter <- res[names(rsq.pseudo)[seq(50)] %>% strsplit(".", fixed = T) %>% sget(1)]
cm.merged <- cao$data.object$getJointCountMatrix() %>%
.[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>%
.[rowSums(.) > 0, ]
## Predict smoothend expression
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7
scFit <- cm.merged %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)
# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))
# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]
# Create heatmap
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))
Heatmap(Smooth,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
row_names_gp = grid::gpar(fontsize = 5))
cpc <- pseudotime %>%
names() %>%
strsplit("_") %>%
sget(1) %>%
factor()
res.filter %>%
lget("residuals") %>%
lapply(as.data.frame) %>%
lapply(setNames, "residuals") %>%
lapply(mutate, group = cpc, pseudotime = pseudotime) %>%
data.table::rbindlist(idcol = "gene") %>%
ggplot(aes(pseudotime, residuals, col = group)) +
geom_smooth() +
theme_bw() +
facet_wrap(~gene, ncol = 5, scales = "free")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
We provide the results output from scHLAcount in
scHLAcount.zip.
samples <- dir("scHLAcount/rydbirk/") %>% .[grepl(pattern = "_", .)]
cms <- lapply(samples, function(x) {
labels <- read.delim(paste0("scHLAcount/rydbirk/",x,"/labels.tsv"), header = F) %>% .[,1]
barcodes <- read.delim(paste0("scHLAcount/rydbirk/",x,"/barcodes.tsv"), header = F) %>%
.[,1] %>%
sapply(function(y) paste0(x,"one_",y))
mtx <- Matrix::readMM(paste0("scHLAcount/rydbirk/",x,"/count_matrix.mtx")) %>%
t() %>%
`dimnames<-`(list(barcodes, labels)) %>%
.[rowSums(.) > 0,] # Remove cells with no counts
tmp <- colnames(mtx) %>%
strsplit("*", T) %>%
sapply(`[[`, 1)
# If more than one allele per HLA type has been detected, sum per HLA type
if(any(tmp %>% table() %>% as.numeric() > 1)) {
cs <- colSums(mtx)
alleles <- unique(tmp)
mtx <- lapply(alleles, function(y) mtx[,tmp == y]) %>%
lapply(function(y) {
if(class(y) == "dgTMatrix") rowSums(y) else y
}) %>%
unlist() %>%
Matrix(nrow = nrow(mtx)) %>%
`rownames<-`(rownames(mtx))
}
colnames(mtx) <- unique(tmp)
return(mtx)
}) %>%
setNames(samples)
Calculations
# Cell-wise
cell.counts <- sapply(cms, rowSums) %>%
unname() %>%
unlist() %>%
`names<-`(., gsub("one_","!!",names(.)))
depth <- getConosDepth(con) %>%
.[match(names(cell.counts), names(.))] %>%
{data.frame(cell = names(.),
sample = names(.) %>% strsplit("!!", T) %>% sapply(`[[`, 1),
depth = unname(.))}
depth[is.na(depth)] <- 0
cell.group <- grepl.replace(names(cell.counts), patterns = c("CTRL","PD","MSA")) %>% as.factor()
mhc1.raw <- sapply(cms, function(x) {
tmp <- x[,colnames(x) %in% c("A","B","C")]
if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>%
unname() %>%
unlist() %>%
`names<-`(., gsub("one_","!!",names(.)))
mhc2.raw <- sapply(cms, function(x) {
tmp <- x[,!colnames(x) %in% c("A","B","C")]
if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>%
unname() %>%
unlist() %>%
`names<-`(., gsub("one_","!!",names(.)))
cell.df <- data.frame(group = cell.group,
counts = cell.counts,
mhc1 = mhc1.raw,
mhc2 = mhc2.raw,
depth = depth$depth) %>%
mutate(., anno = anno.major[match(rownames(.), names(anno.major))],
sample = rownames(.) %>% sapply(strsplit, "!!", T) %>% sapply(`[[`, 1)) %>%
mutate(type = grepl.replace(anno %>% as.character(), levels(anno), c("Brain","Brain","Brain","Peripheral","Peripheral","Peripheral","Brain","Brain","Brain","Brain"))) %>%
`rownames<-`(names(cell.counts)) %>%
filter(!is.na(anno))
Load Smajic data
samples <- dir("scHLAcount/smajic") %>% .[grepl(pattern = "SRR", .)]
cms <- lapply(samples, function(x) {
labels <- read.delim(paste0("scHLAcount/smajic/",x,"/labels.tsv"), header = F) %>% .[,1]
barcodes <- read.delim(paste0("scHLAcount/smajic/",x,"/barcodes.tsv"), header = F) %>%
.[,1] %>%
sapply(function(y) paste0(x,"_",y,"_1"))
mtx <- Matrix::readMM(paste0("scHLAcount/smajic/",x,"/count_matrix.mtx")) %>%
t() %>%
`dimnames<-`(list(barcodes, labels)) %>%
.[rowSums(.) > 0,] # Remove cells with no counts
tmp <- colnames(mtx) %>%
strsplit("*", T) %>%
sapply(`[[`, 1)
# If more than one allele per HLA type has been detected, sum per HLA type
if(any(tmp %>% table() %>% as.numeric() > 1)) {
cs <- colSums(mtx)
alleles <- unique(tmp)
mtx <- lapply(alleles, function(y) mtx[,tmp == y]) %>%
lapply(function(y) {
if(class(y) == "dgTMatrix") rowSums(y) else y
}) %>%
unlist() %>%
Matrix(nrow = nrow(mtx)) %>%
`rownames<-`(rownames(mtx))
}
colnames(mtx) <- unique(tmp)
return(mtx)
}) %>%
setNames(samples)
# Correct naming
name.df <- data.frame(samples = samples, ids = c(rep("PD",6) %>% mapply(function(x,y) paste0(x,y), x = ., y = c(1,1,2:5)),
rep("CTRL",6) %>% mapply(function(x,y) paste0(x,y), x = ., y = 1:6)))
anno <- readRDS(paste0("scHLAcount/smajic/anno.sma.rds")) %>%
setNames(., names(.) %>%
strsplit("_", T) %>%
lapply(function(x) {
if(grepl("C", x[[1]])) x[[1]] %<>% gsub("C","CTRL", .)
return(x)
}) %>%
sapply(function(x) paste0(x[[1]],"_",x[[2]])))
cms <- 1:12 %>%
lapply(function(x) {
rnames <- cms[[x]] %>%
rownames() %>%
names() %>%
sapply(function(y) paste0(name.df[x,2],"_",y)) %>%
unname()
rownames(cms[[x]]) <- rnames
return(cms[[x]])
}) %>%
setNames(names(cms))
Calculations
cell.df.rydbirk <- cell.df
# Cell-wise
cell.counts <- sapply(cms, rowSums) %>%
unname() %>%
unlist()
rem <- cell.counts %>%
names() %>%
table() %>%
.[. > 1] %>%
names()
cell.counts %<>% .[!names(.) %in% rem]
cell.group <- grepl.replace(names(cell.counts), patterns = c("CTRL","PD")) %>% as.factor()
mhc1.raw <- sapply(cms, function(x) {
tmp <- x[,colnames(x) %in% c("A","B","C")]
if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>%
unname() %>%
unlist() %>%
.[!names(.) %in% rem]
mhc2.raw <- sapply(cms, function(x) {
tmp <- x[,!colnames(x) %in% c("A","B","C")]
if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>%
unname() %>%
unlist() %>%
.[!names(.) %in% rem]
cell.df <- data.frame(group = cell.group,
counts = cell.counts,
mhc1 = mhc1.raw,
mhc2 = mhc2.raw) %>%
mutate(., anno = anno[match(rownames(.), names(anno))],
sample = rownames(.) %>% sapply(strsplit, "_", T) %>% sapply(`[[`, 1)) %>%
`rownames<-`(names(cell.counts)) %>%
filter(!is.na(anno))
Integrate with our data and plot
cell.df.int <- rbind(cell.df.rydbirk %>%
select(-c("depth","type")) %>%
mutate(origin = "Rydbirk"),
cell.df %>%
mutate(anno = anno %>% factor(labels = c("Astrocytes","Exc. neurons","Inh. neurons","Endothelial","Eppendymal","Exc. neurons","Inh. neurons","Inh. neurons","Microglia","Oligodendrocytes","OPCs","Pericytes")),
origin = "Smajic")) %>%
filter(! anno %in% c("Blood_immune","Endothelial","Eppendymal","Mural","Pericytes","Pericytes/endothelial","Immune")) %>%
filter(!is.na(anno)) %>%
mutate(anno = anno %>% factor(),
origin = origin %>% factor(),
anno = anno %>% unname() %>% factor(),
sample = sample %>% unname())
cell.anno.df <-
cell.df.int %>%
group_by(anno, sample, group, origin) %>%
summarise(mhc1 = mean(mhc1),
mhc2 = mean(mhc2),
counts = mean(counts),
cells = length(sample)) %>%
as.data.frame() %>%
select(-cells, -counts, -sample)
## `summarise()` has grouped output by 'anno', 'sample', 'group'. You can override
## using the `.groups` argument.
p.text1 <- data.frame(signif = c("n.s.", "n.s.", "n.s.", "n.s.", "n.s.", "0.040", "0.032", "n.s."),
mhc1 = 4.5,
anno = levels(cell.anno.df$anno))
p.text2 <- data.frame(signif = c("0.00057"),
mhc2 = 8.5,
anno = " Microglia")
cowplot::plot_grid(plotlist = list(
ggplot(cell.anno.df,
aes(anno,
mhc1)) +
geom_boxplot(outlier.shape = NA,
aes(fill = group)) +
geom_point(position = position_jitterdodge(jitter.width = 0.2),
aes(fill = group,
col = origin,
shape = group),
size = 2) +
scale_color_manual(values = c("black",
"grey50")) +
labs(x = "",
y = "Mean counts per sample",
title = "MHC-I counts",
fill = "",
col = "",
shape = " ") +
geom_text(data = p.text1, aes(label = signif)) +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
line = element_blank()),
ggplot(cell.anno.df %>% filter(anno == "Microglia") %>% mutate(anno = " Microglia"), aes(anno, mhc2)) +
geom_boxplot(outlier.shape = NA, aes(fill = group)) +
geom_point(position = position_jitterdodge(jitter.width = 0.2), aes(fill = group, col = origin, shape = group), size = 2) +
scale_color_manual(values = c("black","grey50")) +
labs(x = "", y = "", title = "MHC-II counts", fill = "", col = "", shape = " ") +
geom_text(data = p.text2, aes(label = signif)) +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
line = element_blank())
), ncol = 2, rel_widths = c(2,0.9))
Kruskal-Wallis, asses differences per cell type
cell.anno.df %>%
group_by(anno) %>%
kruskal_test(mhc1 ~ group) %>%
data.frame()
## anno .y. n statistic df p method
## 1 Astrocytes mhc1 38 5.4099627 2 0.0669 Kruskal-Wallis
## 2 Microglia mhc1 37 3.5900071 2 0.1660 Kruskal-Wallis
## 3 Oligodendrocytes mhc1 38 4.0883941 2 0.1290 Kruskal-Wallis
## 4 PVMs mhc1 24 3.5403947 2 0.1700 Kruskal-Wallis
## 5 OPCs mhc1 38 0.9335358 2 0.6270 Kruskal-Wallis
## 6 Inh. neurons mhc1 31 6.4335165 2 0.0401 Kruskal-Wallis
## 7 Exc. neurons mhc1 22 6.8860651 2 0.0320 Kruskal-Wallis
## 8 MSN mhc1 21 4.9345432 2 0.0848 Kruskal-Wallis
cell.anno.df %>%
filter(anno == "Microglia") %>%
group_by(anno) %>%
kruskal_test(mhc2 ~ group) %>%
data.frame()
## anno .y. n statistic df p method
## 1 Microglia mhc2 37 14.92493 2 0.000574 Kruskal-Wallis
Wilcoxon, assess differences between our data and Smajic per condition per cell type
cell.anno.df %>%
filter(group != "MSA",
!anno %in% c("MSN","PVMs")) %>%
group_by(anno, group) %>%
wilcox_test(mhc1 ~ origin) %>%
data.frame()
## anno group .y. group1 group2 n1 n2 statistic p
## 1 Astrocytes CTRL mhc1 Rydbirk Smajic 10 6 27 0.792
## 2 Astrocytes PD mhc1 Rydbirk Smajic 11 5 26 0.913
## 3 Microglia CTRL mhc1 Rydbirk Smajic 9 6 21 0.529
## 4 Microglia PD mhc1 Rydbirk Smajic 11 5 30 0.827
## 5 Oligodendrocytes CTRL mhc1 Rydbirk Smajic 10 6 34 0.713
## 6 Oligodendrocytes PD mhc1 Rydbirk Smajic 11 5 26 0.913
## 7 OPCs CTRL mhc1 Rydbirk Smajic 10 6 15 0.118
## 8 OPCs PD mhc1 Rydbirk Smajic 11 5 31 0.743
## 9 Inh. neurons CTRL mhc1 Rydbirk Smajic 8 6 10 0.080
## 10 Inh. neurons PD mhc1 Rydbirk Smajic 7 5 23 0.432
## 11 Exc. neurons CTRL mhc1 Rydbirk Smajic 6 6 10 0.240
## 12 Exc. neurons PD mhc1 Rydbirk Smajic 3 5 5 0.549
cell.anno.df %>%
filter(group != "MSA",
anno == "Microglia") %>%
group_by(anno, group) %>%
wilcox_test(mhc2 ~ origin) %>%
data.frame()
## anno group .y. group1 group2 n1 n2 statistic p
## 1 Microglia CTRL mhc2 Rydbirk Smajic 9 6 19 0.368
## 2 Microglia PD mhc2 Rydbirk Smajic 11 5 40 0.180
We provide a curated list with MHC and cytokine genes. Please note, the order of clusters may change.
dat <- read.table("MHC_cytokines_curated.tsv", header = T, sep = "\t")
cm.merged <- con$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno.major)] %>%
.[rownames(.) %in% (dat$name[dat$class == "cytokine"] %>% gsub("-","",.)), ] %>%
.[rowSums(.) > 0,]
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.major %>%
.[!is.na(.) & (. %in% c("Microglia", "PVMs"))] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
{. + 1} %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
cm.pseudo %<>%
Matrix::t() %>%
scale() %>%
Matrix::t()
ord <- cm.pseudo %>%
colnames() %>%
{data.frame(id = .)} %>%
mutate(.,
ct = strsplit(id, "!!") %>%
sget(2),
condition = strsplit(id, "!!|_") %>%
sget(1)) %>%
arrange(ct, condition) %>%
pull(id)
cm.pseudo %<>%
.[, match(ord, colnames(.))]
cm.pseudo %<>%
as.data.frame() %>%
tibble::rownames_to_column(var = "gene") %>%
reshape2::melt(id.vars = c("gene")) %>%
mutate(variable = as.character(variable),
condition = strsplit(variable, "!!|_") %>%
sget(1),
ct = strsplit(variable, "!!") %>%
sget(2)) %>%
group_by(condition, ct, gene) %>%
summarize(m = mean(value)) %>%
ungroup() %>%
mutate(variable = paste(condition, ct, sep = "!!")) %>%
select(-condition, -ct) %>%
reshape2::dcast(gene ~ variable, value.var = "m") %>%
tibble::column_to_rownames(var = "gene")
## `summarise()` has grouped output by 'condition', 'ct'. You can override using
## the `.groups` argument.
tmp <- cm.pseudo %>%
colnames() %>%
strsplit("_|!!")
clusters <- tmp %>%
sget(2)
condition <- tmp %>%
sget(1)
ha <- ComplexHeatmap::HeatmapAnnotation(Annotation = clusters,
Condition = condition,
col = list(Annotation = c("Microglia" = unname(pal.major["Microglia"]),
"PVMs" = unname(pal.major["PVMs"])),
Condition = c("CTRL" = unname(pal.major["CTRL"]),
"MSA" = unname(pal.major["MSA"]),
"PD" = unname(pal.major["PD"]))
))
ComplexHeatmap::Heatmap(cm.pseudo,
name = "Expression",
show_column_names = F,
show_row_names = T,
cluster_columns = F,
show_column_dend = F,
cluster_rows = T,
top_annotation = ha,
show_row_dend = F,
column_split = colnames(cm.pseudo) %>% strsplit("!!|_") %>% sget(2) %>% unname() %>% factor(),
col=circlize::colorRamp2(c(-1, 1.2), c("white", "firebrick")),
row_km = 4)
## Warning: The input is a data frame-like object, convert it to a matrix.
c("FOSL2", "TCF4", "STAT1", "NR3C1", "ETV7", "THRB", "BPTF", "RELB", "CEBPD", "HIF1A", "ELF1", "CREM", "ELF2", "ZNF44", "CHD1", "POU2F1", "GTF2IRD1", "NFIA", "NFE2L2", "FOXP1", "FOXO3") %>%
clusterProfiler::enrichGO("org.Hs.eg.db", "SYMBOL", "BP") %>%
enrichplot::pairwise_termsim() %>%
enrichplot::treeplot() +
scale_fill_manual(values = brewer.pal(6, "Greys")[-1])
##
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
We provide the results from scDRS based on Nalls et al. GWAS summary stats.
dat.scores <- qread("gwas/nalls/PD.full_score.qs")
# Load downstream analyses for cell types versus trait
dat.ct <- read.delim("gwas/nalls/downstream_celltype.tsv", header = F, sep = "\t") %$%
strsplit(V1, " ") %>%
unlist() %>%
gsub("\\", "", ., fixed = T) %>%
gsub("\n", "", ., fixed = T) %>%
.[!sapply(., \(x) x == "")] %>%
.[c(1:5,69:72, # Header
8:12,75:78, # Astro
15:19,81:84, # EN
21:25,86:89, # Immune
28:32,92:95, # IN
34:38,97:100, # MSN
40:44,102:105, # MIC
46:50,107:110, # OPCs
52:56,112:115, # OL
58:62,117:120, # PVMs
64:68,122:125 # Peri
)]
dat.sign <- dat.ct %>%
split(seq(9)) %>%
bind_rows() %>%
{`colnames<-`(.[-1, ], .[1, ])} %>%
as.data.frame()
dat.sign %<>% mutate(group = c("Astrocytes",
"Exc. neurons",
"Immune",
"Inh. neurons",
"MSN",
"Microglia",
"OPCs",
"Oligodendrocytes",
"PVMs",
"Pericytes/endothelial"))
dat.sign %<>%
filter(!group == "Unknown") %>%
mutate(p.adj = p.adjust(assoc_mcp, method = "fdr"),
hetero.adj = p.adjust(hetero_mcp, method = "fdr")) %>%
mutate(assoc_sign = sapply(assoc_mcp, \(x) if (x <= 0.001) "***" else if (x <= 0.01 && x > 0.001) "**" else if (x <= 0.05 && x > 0.01) "*" else ""),
hetero_sign = sapply(hetero_mcp, \(x) if (x <= 0.001) "###" else if (x <= 0.01 && x > 0.001) "##" else if (x <= 0.05 && x > 0.01) "#" else " ")) %>%
mutate(sign = paste0(assoc_sign,",",hetero_sign) %>% gsub(", ", "", .)) %>%
select(group, sign) %>%
dplyr::rename(type = group)
data.frame(cell = dat.scores$X,
score = dat.scores$norm_score,
type = anno.major[match(dat.scores$X,
names(anno.major))]) %>%
group_by(type) %>%
summarize(m = median(score)) %>%
filter(!is.na(type)) %>%
mutate(type = as.character(type)) %$%
arrange(., desc(m)) %>%
mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
ggplot(aes(type, m, fill = type)) +
geom_bar(stat = "identity") +
geom_text(aes(label = sign), vjust = -0.5) +
theme_bw() +
labs(y = "Mean score", x = "", title = "scDRS mean scores and associations") +
guides(fill = "none") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
line = element_blank()) +
ylim(c(-0.6, 2)) +
scale_fill_manual(values = pal.major) +
geom_hline(yintercept = 0)
dat.ct <- read.delim("gwas/nalls/downstream_celltype_microglia.tsv", header = F, sep = "\t") %$%
strsplit(V1, " ") %>%
unlist() %>%
gsub("\\", "", ., fixed = T) %>%
gsub("\n", "", ., fixed = T) %>%
gsub("group", "", .) %>%
.[!sapply(., \(x) x == "")] %>%
{c("group", .[seq(35)])} %>%
matrix(ncol = 6, byrow = T) %>%
as.data.frame() %>%
`colnames<-`(., .[1, ]) %>%
.[-1, ]
dat.sign <-
dat.ct %>%
filter(!group == "Unknown") %>%
mutate(p.adj = p.adjust(assoc_mcp, method = "fdr"),
hetero.adj = p.adjust(hetero_mcp, method = "fdr")) %>%
mutate(assoc_sign = sapply(assoc_mcp, \(x) if (x <= 0.001) "***" else if (x <= 0.01 && x > 0.001) "**" else if (x <= 0.05 && x > 0.01) "*" else ""),
hetero_sign = sapply(hetero_mcp, \(x) if (x <= 0.001) "###" else if (x <= 0.01 && x > 0.001) "##" else if (x <= 0.05 && x > 0.01) "#" else " ")) %>%
mutate(sign = paste0(assoc_sign,",",hetero_sign) %>% gsub(", ", "", .)) %>%
select(group, sign) %>%
dplyr::rename(type = group)
data.frame(cell = dat.scores$X %>% gsub("one_", "!!", .),
score = dat.scores$norm_score,
type = anno.major[match(dat.scores$X,
names(anno.major))]) %>%
filter(cell %in% names(anno.micro)) %>%
mutate(type = factor(anno.micro[.$cell])) %>%
group_by(type) %>%
summarize(m = mean(score)) %>%
filter(!is.na(type)) %>%
mutate(type = as.character(type)) %>%
arrange(desc(m)) %>%
mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
mutate(type = factor(type, levels = type)) %>%
ggplot(aes(type, m, fill = type)) +
geom_bar(stat = "identity") +
geom_text(aes(label = sign), vjust = -0.5, nudge_y = -0.05) +
theme_bw() +
theme(line = element_blank()) +
labs(y = "Mean score", x = "", title = "scDRS for microglia") +
guides(fill = "none") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
ylim(c(0, 2)) +
scale_fill_manual(values = pal.major)
We provide the results from scDRS based on Chia et al. GWAS summary stats.
dat.scores <- qread("gwas/chia/MSA.full_score.qs")
# Load downstream analyses for cell types versus trait
dat.ct <- read.delim("gwas/chia/downstream_celltype.tsv", header = F, sep = "\t") %$%
strsplit(V1, " ") %>%
unlist() %>%
gsub("\\", "", ., fixed = T) %>%
gsub("\n", "", ., fixed = T) %>%
.[!sapply(., \(x) x == "")] %>%
.[c(1:5,69:72, # Header
8:12,75:78, # Astro
15:19,81:84, # EN
21:25,86:89, # Immune
28:32,92:95, # IN
34:38,97:100, # MSN
40:44,102:105, # MIC
46:50,107:110, # OPCs
52:56,112:115, # OL
58:62,117:120, # PVMs
64:68,122:125 # Peri
)]
dat.sign <- dat.ct %>%
split(seq(9)) %>%
bind_rows() %>%
{`colnames<-`(.[-1, ], .[1, ])} %>%
as.data.frame()
dat.sign %<>% mutate(group = c("Astrocytes",
"Exc. neurons",
"Immune",
"Inh. neurons",
"MSN",
"Microglia",
"OPCs",
"Oligodendrocytes",
"PVMs",
"Pericytes/endothelial"))
dat.sign %<>%
filter(!group == "Unknown") %>%
mutate(assoc_sign = sapply(assoc_mcp, \(x) if (x <= 0.001) "***" else if (x <= 0.01 && x > 0.001) "**" else if (x <= 0.05 && x > 0.01) "*" else ""),
hetero_sign = sapply(hetero_mcp, \(x) if (x <= 0.001) "###" else if (x <= 0.01 && x > 0.001) "##" else if (x <= 0.05 && x > 0.01) "#" else " ")) %>%
mutate(sign = paste0(assoc_sign,",",hetero_sign) %>% gsub(",", "", .)) %>%
select(group, sign) %>%
dplyr::rename(type = group)
data.frame(cell = dat.scores$X,
score = dat.scores$norm_score,
type = anno.major[match(dat.scores$X,
names(anno.major))]) %>%
group_by(type) %>%
summarize(m = median(score)) %>%
filter(!is.na(type)) %>%
mutate(type = as.character(type)) %$%
arrange(., desc(m)) %>%
mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
ggplot(aes(type, m, fill = type)) +
geom_bar(stat = "identity") +
geom_text(aes(label = sign), vjust = 1.5) +
theme_bw() +
labs(y = "Mean score", x = "", title = "scDRS mean scores and associations") +
guides(fill = "none") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
line = element_blank()) +
ylim(c(-0.5, 0.5)) +
scale_fill_manual(values = pal.major) +
geom_hline(yintercept = 0)
All subfigures were made in GraphPad Prism. Data are available in
Phagocytosis_assay_triplicates_raw_data_HH.tsv (Copenhagen
experiment) or HOUSTON.tsv (Houston experiment).
Time to knit
Sys.time() - tt
## Time difference of 16.95721 mins
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.1 LTS
##
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2025.0/lib/libmkl_gf_lp64.so.2; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] DOSE_3.30.5 slingshot_2.12.0
## [3] TrajectoryUtils_1.12.0 SingleCellExperiment_1.26.0
## [5] SummarizedExperiment_1.34.0 Biobase_2.64.0
## [7] GenomicRanges_1.56.1 GenomeInfoDb_1.40.1
## [9] IRanges_2.38.1 S4Vectors_0.42.1
## [11] BiocGenerics_0.50.0 MatrixGenerics_1.16.0
## [13] matrixStats_1.4.1 princurve_2.1.6
## [15] rstatix_0.7.2 stringr_1.5.1
## [17] ComplexHeatmap_2.20.0 circlize_0.4.16
## [19] ggrepel_0.9.6 CRMetrics_0.3.0
## [21] RColorBrewer_1.1-3 ggpmisc_0.6.0
## [23] ggpp_0.5.8-1 ggforce_0.4.2
## [25] reshape2_1.4.4 STRINGdb_2.16.4
## [27] ggpubr_0.6.0 cowplot_1.1.3
## [29] ggsci_3.2.0 ggplot2_3.5.1
## [31] qs_0.27.2 scHelper_0.0.4
## [33] sccore_1.0.5 cacoa_0.4.0
## [35] dplyr_1.1.4 magrittr_2.0.3
## [37] conos_1.5.2 igraph_2.0.3
## [39] Matrix_1.7-0
##
## loaded via a namespace (and not attached):
## [1] TSCAN_1.42.0 fs_1.6.4
## [3] bitops_1.0-8 enrichplot_1.24.4
## [5] httr_1.4.7 doParallel_1.0.17
## [7] tools_4.4.2 backports_1.5.0
## [9] utf8_1.2.4 R6_2.5.1
## [11] lazyeval_0.2.2 uwot_0.2.2
## [13] mgcv_1.9-1 GetoptLong_1.0.5
## [15] withr_3.0.1 gridExtra_2.3
## [17] quantreg_5.98 cli_3.6.3
## [19] Cairo_1.6-2 TSP_1.2-4
## [21] scatterpie_0.2.4 labeling_0.4.3
## [23] sass_0.4.9 pbapply_1.7-2
## [25] yulab.utils_0.1.7 gson_0.1.0
## [27] R.utils_2.12.3 plotrix_3.8-4
## [29] limma_3.60.5 rstudioapi_0.16.0
## [31] RSQLite_2.3.7 gridGraphics_0.5-1
## [33] generics_0.1.3 shape_1.4.6.1
## [35] RApiSerialize_0.1.4 combinat_0.0-8
## [37] gtools_3.9.5 car_3.1-3
## [39] GO.db_3.19.1 ggbeeswarm_0.7.2
## [41] fansi_1.0.6 abind_1.4-8
## [43] R.methodsS3_1.8.2 lifecycle_1.0.4
## [45] edgeR_4.2.1 yaml_2.3.10
## [47] carData_3.0-5 gplots_3.1.3.1
## [49] qvalue_2.36.0 SparseArray_1.4.8
## [51] Rtsne_0.17 blob_1.2.4
## [53] promises_1.3.0 crayon_1.5.3
## [55] lattice_0.22-6 KEGGREST_1.44.1
## [57] pillar_1.9.0 knitr_1.48
## [59] fgsea_1.30.0 rjson_0.2.23
## [61] codetools_0.2-20 fastmatch_1.1-4
## [63] glue_1.8.0 drat_0.2.4
## [65] ggfun_0.1.6 data.table_1.16.0
## [67] treeio_1.28.0 vctrs_0.6.5
## [69] png_0.1-8 urltools_1.7.3
## [71] gtable_0.3.5 gsubfn_0.7
## [73] cachem_1.1.0 dendsort_0.3.4
## [75] xfun_0.47 S4Arrays_1.4.1
## [77] mime_0.12 tidygraph_1.3.1
## [79] survival_3.7-0 seriation_1.5.6
## [81] iterators_1.0.14 pbmcapply_1.5.1
## [83] N2R_1.0.3 fastICA_1.2-5.1
## [85] Rook_1.2 statmod_1.5.0
## [87] brew_1.0-10 nlme_3.1-166
## [89] ggtree_3.12.0 tradeSeq_1.18.0
## [91] bit64_4.5.2 bslib_0.8.0
## [93] irlba_2.3.5.1 vipor_0.4.7
## [95] KernSmooth_2.23-24 FSA_0.9.5
## [97] colorspace_2.1-1 DBI_1.2.3
## [99] ggrastr_1.0.2 DESeq2_1.44.0
## [101] tidyselect_1.2.1 bit_4.5.0
## [103] compiler_4.4.2 chron_2.3-61
## [105] httr2_1.0.5 SparseM_1.84-2
## [107] pagoda2_1.0.12 DelayedArray_0.30.1
## [109] shadowtext_0.1.4 stringfish_0.16.0
## [111] triebeard_0.4.1 scales_1.3.0
## [113] caTools_1.18.3 rappdirs_0.3.3
## [115] digest_0.6.37 rmarkdown_2.28
## [117] ca_0.71.1 XVector_0.44.0
## [119] htmltools_0.5.8.1 pkgconfig_2.0.3
## [121] sparseMatrixStats_1.16.0 dunn.test_1.3.6
## [123] highr_0.11 fastmap_1.2.0
## [125] rlang_1.1.4 GlobalOptions_0.1.2
## [127] UCSC.utils_1.0.0 DelayedMatrixStats_1.26.0
## [129] shiny_1.9.1 farver_2.1.2
## [131] jquerylib_0.1.4 jsonlite_1.8.9
## [133] BiocParallel_1.38.0 mclust_6.1.1
## [135] GOSemSim_2.30.2 R.oo_1.26.0
## [137] confintr_1.0.2 ggplotify_0.1.2
## [139] polynom_1.4-1 Formula_1.2-5
## [141] GenomeInfoDbData_1.2.12 patchwork_1.3.0
## [143] munsell_0.5.1 Rcpp_1.0.13
## [145] ggnewscale_0.5.0 viridis_0.6.5
## [147] ape_5.8 proto_1.0.0
## [149] sqldf_0.4-11 leidenAlg_1.1.3
## [151] stringi_1.8.4 ggraph_2.2.1
## [153] zlibbioc_1.50.0 MASS_7.3-61
## [155] org.Hs.eg.db_3.19.1 plyr_1.8.9
## [157] parallel_4.4.2 graphlayouts_1.2.0
## [159] Biostrings_2.72.1 splines_4.4.2
## [161] hash_2.2.6.3 locfit_1.5-9.10
## [163] ggsignif_0.6.4 evaluate_1.0.0
## [165] RcppParallel_5.1.9 foreach_1.5.2
## [167] tweenr_2.0.3 httpuv_1.6.15
## [169] MatrixModels_0.5-3 tidyr_1.3.1
## [171] RMTstat_0.3.1 purrr_1.0.2
## [173] polyclip_1.10-7 clue_0.3-65
## [175] broom_1.0.7 xtable_1.8-4
## [177] tidytree_0.4.6 later_1.3.2
## [179] viridisLite_0.4.2 tibble_3.2.1
## [181] clusterProfiler_4.12.6 aplot_0.2.3
## [183] registry_0.5-1 memoise_2.0.1
## [185] beeswarm_0.4.0 AnnotationDbi_1.66.0
## [187] cluster_2.1.6